References

Background

Here we have 82 data points in the galaxy from a 1-dimensional unknown distribution. The aim is to fit a normal finite mixture model with a pre-specified number of latent clusters.

Assessment thus-far

Each chain seems to converge to something, but the mixing of several chains are very poor. Is there still a label switching issue although the means are ordered in the prior?

Load packages

library(tidyverse)
library(rstan)
## devtools::install_github('jburos/biostan', build_vignettes = TRUE, dependencies = TRUE)
## library(biostan)
library(DPpackage)
set.seed(732565397)

Prepare data

data(galaxy, package = "DPpackage")
galaxy <- galaxy %>%
    as_data_frame() %>%
    mutate(log_speed = log(speed),
           k_speed = speed / 1000)
## Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
galaxy
## # A tibble: 82 x 3
##    speed log_speed k_speed
##    <dbl>     <dbl>   <dbl>
##  1  9172      9.12    9.17
##  2  9350      9.14    9.35
##  3  9483      9.16    9.48
##  4  9558      9.17    9.56
##  5  9775      9.19    9.78
##  6 10227      9.23   10.2 
##  7 10406      9.25   10.4 
##  8 16084      9.69   16.1 
##  9 16170      9.69   16.2 
## 10 18419      9.82   18.4 
## # … with 72 more rows
ggplot(data = galaxy, mapping = aes(x = k_speed)) +
    geom_point(y = 0.5) +
    scale_y_continuous(limits = c(0,1), breaks = NULL) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

The speed data seem to show some distinct clusters. We will use the following grid for later visualization.

grid_max <- 40
grid_min <- -20
grid_length <- 100

Single Normal

We will start with density estimation with a single normal distribution as a starting point.

Model

\[\begin{align*} y_{i} | (\mu, \sigma^{2}) &\sim N(\mu, \sigma^{2})\\ \\ p(\mu,\sigma^{2}) &= p(\mu|\sigma^{2})p(\sigma^{2})\\ &= N(\mu | m, s^{2}) Inverse-Gamma(\sigma^{2} | \alpha, \beta)\\ &= \left[ \frac{1}{\sqrt{2\pi s^{2}}} \exp \left( - \frac{(\mu - m)^{2}}{2 \times s^{2}} \right) \right] \left[ \frac{\beta^{\alpha}}{\Gamma(\alpha)} (\sigma^{2})^{-\alpha - 1} \exp \left( - \frac{\beta}{\sigma^{2}} \right) \right]\\ &\text{where}\\ m &= 0\\ s^{2} &= 10^{3}\\ \alpha &= 10^{-3}\\ \beta &= 10^{-3}\\ \end{align*} \]

Implementation

normal_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal.stan")
## Warning: replacing previous import 'rstan::loo' by 'rstanarm::loo' when loading 'biostan'
biostan::print_stan_code(normal_stan_code, section = NULL)
## data { 
##     // Hyperparameters 
##     real alpha; 
##     real beta; 
##     real m; 
##     real s_squared; 
##  
##     // Define variables in data 
##     // Number of observations (an integer) 
##     int<lower=0> n; 
##     // Outcome (a real vector of length n) 
##     real y[n]; 
##  
##     // Grid evaluation 
##     real grid_max; 
##     real grid_min; 
##     int<lower=1> grid_length; 
## } 
##  
## transformed data { 
##     real s; 
##     real grid_step; 
##  
##     s = sqrt(s_squared); 
##     grid_step = (grid_max - grid_min) / (grid_length - 1); 
## } 
##  
## parameters { 
##     // Define parameters to estimate 
##     // Population mean (a real number) 
##     real mu; 
##     // Population variance (a positive real number) 
##     real<lower=0> sigma_squared; 
## } 
##  
## transformed parameters { 
##     // Population standard deviation (a positive real number) 
##     real<lower=0> sigma; 
##     // Standard deviation (derived from variance) 
##     sigma = sqrt(sigma_squared); 
## } 
##  
## model { 
##     // Prior part of Bayesian inference 
##     // Mean 
##     mu ~ normal(m, s); 
##     // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior 
##     sigma_squared ~ inv_gamma(alpha, beta); 
##  
##     // Likelihood part of Bayesian inference 
##     // Outcome model N(mu, sigma^2) (use SD rather than Var) 
##     y ~ normal(mu, sigma); 
## } 
##  
## generated quantities { 
##  
##     real log_f[grid_length]; 
##  
##     for (g in 1:grid_length) { 
##         // Definiting here avoids reporting of these intermediates. 
##         real grid_value; 
##         grid_value = grid_min + grid_step * (g - 1); 
##         log_f[g] = normal_lpdf(grid_value | mu, sigma); 
##     } 
##  
## } 
## 

Fitting

Helper functions for here and later use.

print_relevant_pars <- function(fit, pars = c("mu","sigma_squared","Pi","sigma","lp__")) {
    print(fit, pars = pars)
}

traceplot_all <- function(fit, pars = c("mu","sigma","Pi","lp__")) {
    for (par in pars) {
        print(traceplot(fit, inc_warmup = TRUE, pars = par))
    }
}

pairs_plot_all <- function(fit, pars = c("mu","sigma_squared","Pi")) {
    for (par in pars) {
        pairs(fit, pars = par)
    }
}

plot_draws <- function(stan_fit) {
    ## Note direct access to global variables
    draw_data  <- tidybayes::tidy_draws(stan_fit) %>%
        select(.chain, .iteration, .draw, starts_with("log_f")) %>%
        gather(key = key, value = value, starts_with("log_f")) %>%
        mutate(key = gsub("log_f|\\[|\\]", "", key) %>% as.integer(),
           x = factor(key, labels = seq(from = grid_min, to = grid_max, length.out = grid_length)) %>%
               as.character() %>%
               as.numeric(),
           value = exp(value))

    summary_density <- draw_data %>%
        group_by(.chain, x) %>%
        summarize(value = mean(value))

    ggplot(data = draw_data, mapping = aes(x = x, y = value,
           group = interaction(.chain, .iteration, .draw))) +
    ## geom_line(size = 0.1, alpha = 1/20) +
    geom_line(data = summary_density, mapping = aes(group = .chain), size = 0.5, color = "gray") +
    geom_point(data = galaxy, mapping = aes(x = k_speed, group = NULL), y = 0) +
    facet_wrap(~ .chain) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())
}
normal_stan_fit <-
    rstan::stan(model_code = normal_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
print(normal_stan_fit, pars = c("mu","sigma","lp__"))
## Inference for Stan model: 588758d84b4b479190683080904cb88c.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##          mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu      20.83    0.01 0.52   19.81   20.48   20.83   21.17   21.85 10130    1
## sigma    4.61    0.00 0.36    3.97    4.36    4.59    4.84    5.39  9190    1
## lp__  -166.29    0.01 0.99 -168.93 -166.70 -165.99 -165.57 -165.31  5240    1
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:22:44 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

The sampling process seems ok. Plot density function draws.

plot_draws(normal_stan_fit)

Apparently, a single normal distribution model is not a sufficient description of the density.

Finite Mixture of Normals

Now we examine if modeling the distribution as a mixture of several underlying cluster-specific normals better fit the data. Here we assume some fixed number of clusters \(H\).

Model

Let \(z_{i} \in \left\{ 1, \dots, H \right\}\) be the cluster membership latent variable and \(\mathbf{z}_{i} = (I(z_{i}=1),\dots,I(z_{i}=H))^{T}\) be the vector indicator version. We have the following model.

\[\begin{align*} y_{i} | (z_{i}, \mu_{1},\dots,\mu_{H}, \sigma^{2}_{1},\dots,\sigma^{2}_{H}) &\sim N(\mu_{z_{i}}, \sigma^{2}_{z_{i}})\\ \mathbf{z}_{i} | \boldsymbol{\pi} &\sim Multinomial(1, \boldsymbol{\pi})\\ \\ p(\mu_{h},\sigma^{2}_{h}) &= p(\mu_{h}|\sigma^{2}_{h})p(\sigma^{2}_{h})\\ &= N(\mu_{h} | m, s^{2}) Inverse-Gamma(\sigma^{2}_{h} | \alpha, \beta)\\ &= \left[ \frac{1}{\sqrt{2\pi s^{2}}} \exp \left( - \frac{(\mu_{h} - m)^{2}}{2 \times s^{2}} \right) \right] \left[ \frac{\beta^{\alpha}}{\Gamma(\alpha)} (\sigma^{2}_{h})^{-\alpha - 1} \exp \left( - \frac{\beta}{\sigma^{2}_{h}} \right) \right]\\ &\text{where}\\ m &= 0\\ s^{2} &= 10^{3}\\ \alpha &= 10^{-3}\\ \beta &= 10^{-3}\\ \\ p(\boldsymbol{\pi}) &\sim Dirichlet \left( \frac{\alpha}{H}, \dots, \frac{\alpha}{H} \right)\\ \end{align*} \]

Summing out the latent categorical variable \(z_{i}\) results in the following (conditioning on parameters suppressed for simplicity) marginal density. Note the cluster membership latent variable \(z_i\) is not measured, thus, it is a discrete parameter. Stan’s Hamiltonian Monte Carlo (HMC) cannot deal with discrete parameters, this marginalization step is required fro Stan. JAGS seems to allow a discrete parameter and accepts the original model above.

\[\begin{align*} p(y) &= \sum^{H}_{z=1} p(y|z) p(z)\\ &= \sum^{H}_{z=1} \pi_{z} p(y|z)\\ &= \sum^{H}_{h=1} \pi_h N(y | \mu_{h}, \sigma^{2}_{h})\\ \end{align*} \]

Another layer of complexity is the label switching issue. That is, the cluster IDs \(\left\{ 1, \dots, H \right\}\) do not specify which cluster corresponds to which ID when all the cluster-specific prior for the normal distribution \(p(\mu_{h},\sigma^{2}_{h})\) is the same. The solution is somehow make cluster IDs distinguishable. Ideally, this should be done on the substantive basis to set a different prior for each hypothesized latent cluster. In the one dimensional case we have, a common solution seems to constrain \(\mu_{1} \le \mu_{2} \le \dots \le \mu_{H}\).

Implementation

normal_fixed_mixture_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_unordered.stan")
biostan::print_stan_code(normal_fixed_mixture_stan_code, section = NULL)
## data { 
##     // Hyperparameters 
##     real alpha; 
##     real beta; 
##     real m; 
##     real s_squared; 
##     real<lower=0> dirichlet_alpha; 
##  
##     // Define variables in data 
##     // Number of observations (an integer) 
##     int<lower=0> n; 
##     // Outcome (a real vector of length n) 
##     real y[n]; 
##     // Number of latent clusters 
##     int<lower=1> H; 
##  
##     // Grid evaluation 
##     real grid_max; 
##     real grid_min; 
##     int<lower=1> grid_length; 
## } 
##  
## transformed data { 
##     real s; 
##     real grid_step; 
##  
##     s = sqrt(s_squared); 
##     grid_step = (grid_max - grid_min) / (grid_length - 1); 
## } 
##  
## parameters { 
##     // Define parameters to estimate 
##     // Population mean (a real number) 
##     vector[H] mu; 
##     // Population variance (a positive real number) 
##     real<lower=0> sigma_squared[H]; 
##     // Cluster probability 
##     simplex[H] Pi; 
## } 
##  
## transformed parameters { 
##     // Population standard deviation (a positive real number) 
##     real<lower=0> sigma[H]; 
##     // Standard deviation (derived from variance) 
##     sigma = sqrt(sigma_squared); 
## } 
##  
## model { 
##     // Temporary vector for loop use. Need to come first before priors. 
##     real contributions[H]; 
##  
##     // Prior part of Bayesian inference 
##     // All vectorized 
##     // Mean 
##     mu ~ normal(m, s); 
##     // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior 
##     sigma_squared ~ inv_gamma(alpha, beta); 
##     // cluster probability vector 
##     Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H)); 
##  
##     // Likelihood part of Bayesian inference 
##     // Outcome model N(mu, sigma^2) (use SD rather than Var) 
##     for (i in 1:n) { 
##         // Loop over individuals 
##  
##           for (h in 1:H) { 
##               // Loop over clusters within each individual 
##               // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h])) 
##               contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]); 
##           } 
##  
##           // log(sum(exp(contribution element))) 
##           target += log_sum_exp(contributions); 
##  
##     } 
## } 
##  
## generated quantities { 
##  
##     real log_f[grid_length]; 
##  
##     for (g in 1:grid_length) { 
##         // Definiting here avoids reporting of these intermediates. 
##         real contributions[H]; 
##         real grid_value; 
##  
##         grid_value = grid_min + grid_step * (g - 1); 
##         for (h in 1:H) { 
##             contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]); 
##         } 
##  
##         log_f[g] = log_sum_exp(contributions); 
##     } 
##  
## } 
## 

Fitting

6 latent cluster model

Let us try a 6 latent cluster model with otherwise similar vague priors except Dirichlet(5,…,5) to avoid cluster degeneration.

normal_fixed_mixture_stan_fit6 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 6*5,
                            H = 6,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 1425 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 3441 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(normal_fixed_mixture_stan_fit6)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
print_relevant_pars(normal_fixed_mixture_stan_fit6)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                            mean       se_mean            sd    2.5%     25%     50%     75%          97.5% n_eff
## mu[1]              2.040000e+01  1.380000e+00  9.370000e+00    6.48   19.78   22.00   23.16   3.239000e+01    46
## mu[2]              1.588000e+01  3.070000e+00  1.931000e+01  -47.72   10.00   20.46   23.79   3.373000e+01    40
## mu[3]              1.824000e+01  1.960000e+00  8.330000e+00    8.88    9.96   20.19   22.81   3.163000e+01    18
## mu[4]              1.840000e+01  1.050000e+00  1.248000e+01   -5.86    9.92   19.95   22.86   3.282000e+01   140
## mu[5]              1.948000e+01  2.950000e+00  1.161000e+01  -15.78   19.71   21.57   23.60   3.299000e+01    16
## mu[6]              1.863000e+01  1.830000e+00  8.900000e+00    6.77   16.16   20.05   22.74   3.057000e+01    24
## sigma_squared[1]  2.416667e+299           NaN           Inf    0.08    0.41    1.55    8.43   7.270808e+93   NaN
## sigma_squared[2]  2.373119e+303           NaN           Inf    0.02    0.38    2.06   36.77  2.189092e+234   NaN
## sigma_squared[3]   8.516214e+41  8.503134e+41  9.317753e+43    0.02    0.27    0.85    3.69   8.547000e+01 12008
## sigma_squared[4]  3.410077e+285           NaN           Inf    0.07    0.33    0.88    6.03   3.374356e+57   NaN
## sigma_squared[5]  3.286989e+304           NaN           Inf    0.08    0.36    1.76    9.56  4.130435e+193   NaN
## sigma_squared[6]  9.329738e+298           NaN           Inf    0.00    0.20    0.59    3.64   8.381711e+98   NaN
## Pi[1]              1.900000e-01  1.000000e-02  9.000000e-02    0.04    0.13    0.18    0.26   3.700000e-01    91
## Pi[2]              1.300000e-01  2.000000e-02  9.000000e-02    0.02    0.07    0.12    0.18   3.400000e-01    24
## Pi[3]              1.700000e-01  3.000000e-02  1.000000e-01    0.04    0.10    0.14    0.24   3.600000e-01    14
## Pi[4]              1.800000e-01  3.000000e-02  1.000000e-01    0.04    0.10    0.15    0.27   3.700000e-01    13
## Pi[5]              1.600000e-01  2.000000e-02  1.000000e-01    0.03    0.08    0.14    0.22   3.800000e-01    20
## Pi[6]              1.600000e-01  2.000000e-02  9.000000e-02    0.04    0.08    0.14    0.22   3.500000e-01    14
## sigma[1]          4.766986e+147 4.489461e+147 4.915935e+149    0.29    0.64    1.25    2.90   2.182751e+46 11990
## sigma[2]          9.534330e+149           NaN 4.870737e+151    0.15    0.62    1.43    6.06  1.478345e+117   NaN
## sigma[3]           8.944962e+18  8.634780e+18  9.228285e+20    0.14    0.52    0.92    1.92   9.240000e+00 11422
## sigma[4]          5.330830e+140 5.325445e+140 5.839587e+142    0.27    0.58    0.94    2.46   5.781632e+28 12024
## sigma[5]          5.543089e+150           NaN 1.812233e+152    0.29    0.60    1.33    3.09   4.853280e+96   NaN
## sigma[6]          2.802587e+147 2.786289e+147 3.054461e+149    0.04    0.45    0.77    1.91   2.869078e+49 12018
## lp__              -2.679500e+02  1.070000e+00  4.840000e+00 -276.97 -271.73 -268.10 -263.97  -2.597000e+02    21
##                  Rhat
## mu[1]            1.15
## mu[2]            1.18
## mu[3]            1.75
## mu[4]            1.13
## mu[5]            1.35
## mu[6]            1.37
## sigma_squared[1]  NaN
## sigma_squared[2]  NaN
## sigma_squared[3] 1.00
## sigma_squared[4]  NaN
## sigma_squared[5]  NaN
## sigma_squared[6]  NaN
## Pi[1]            1.16
## Pi[2]            1.25
## Pi[3]            1.47
## Pi[4]            1.50
## Pi[5]            1.31
## Pi[6]            1.34
## sigma[1]         1.00
## sigma[2]         1.00
## sigma[3]         1.00
## sigma[4]         1.00
## sigma[5]         1.00
## sigma[6]         1.00
## lp__             1.29
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:24:46 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit6)

pairs_plot_all(normal_fixed_mixture_stan_fit6)

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected

## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in par(usr = c(usr[1:2], 0, 1.5)): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

plot_draws(normal_fixed_mixture_stan_fit6)

There were many divergent transitions (red dots in the pairs plots). The cross appearance of the mu pairs plot likely indicates that once the cluster has essentially zero probability, any value of mu is allowed. Interestingly, the resulting density estimates are quite similar.

1 latent cluster model

Let us sanity check with just one cluster.

normal_fixed_mixture_stan_fit1 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 1,
                            H = 1,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
print_relevant_pars(normal_fixed_mixture_stan_fit1)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                     mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]              20.84    0.00 0.51   19.85   20.49   20.84   21.18   21.83 10827    1
## sigma_squared[1]   21.37    0.04 3.45   15.60   18.90   21.05   23.48   28.93  8799    1
## Pi[1]               1.00     NaN 0.00    1.00    1.00    1.00    1.00    1.00   NaN  NaN
## sigma[1]            4.61    0.00 0.37    3.95    4.35    4.59    4.85    5.38  8957    1
## lp__             -241.65    0.01 1.01 -244.42 -242.03 -241.34 -240.94 -240.66  5256    1
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:25:38 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit1)

plot_draws(normal_fixed_mixture_stan_fit1)

This gave a similar result to the first single normal fit, except that the additional \(\pi\) parameter, which can only be 1 in this case.

2 latent cluster model

This model has two latent clusters.

normal_fixed_mixture_stan_fit2 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 2*5,
                            H = 2,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
print_relevant_pars(normal_fixed_mixture_stan_fit2)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                     mean se_mean    sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]              20.50    0.45  1.79   16.09   19.50   21.18   21.69   22.63    16 1.29
## mu[2]              17.94    1.97  4.93    9.47   10.49   20.95   21.39   22.01     6 4.81
## sigma_squared[1]   40.02   13.58 39.91    2.56    6.72   21.86   65.67  131.81     9 1.82
## sigma_squared[2]   20.57   12.79 35.64    0.11    1.69    3.52   11.83  113.58     8 2.13
## Pi[1]               0.55    0.10  0.25    0.20    0.31    0.52    0.81    0.91     6 3.84
## Pi[2]               0.45    0.10  0.25    0.09    0.19    0.48    0.69    0.80     6 3.84
## sigma[1]            5.47    1.21  3.18    1.60    2.59    4.67    8.10   11.48     7 2.72
## sigma[2]            3.19    1.27  3.22    0.34    1.30    1.88    3.34   10.66     6 3.69
## lp__             -232.21    0.62  2.25 -237.45 -233.70 -231.72 -230.39 -229.20    13 1.37
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:25:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit2)

pairs_plot_all(normal_fixed_mixture_stan_fit2)

plot_draws(normal_fixed_mixture_stan_fit2)

No divergent transitions were observed with just two clusters. However, Rhat statistics are high and mixing is poor including lp__.

3 latent cluster model

normal_fixed_mixture_stan_fit3 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 3*5,
                            H = 3,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 16 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 908 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit3)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                     mean se_mean    sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]              17.84    2.44  6.13    9.42    9.83   20.94   21.47   27.78     6 4.54
## mu[2]              20.98    2.26  5.95    9.51   19.88   21.32   24.21   32.82     7 2.76
## mu[3]              17.85    2.87  7.25    9.40    9.75   21.12   23.22   30.82     6 4.18
## sigma_squared[1]   14.20    9.30 29.20    0.11    0.36    3.45    7.10   93.55    10 1.62
## sigma_squared[2]   15.05    6.17 26.85    0.12    0.55    4.02   21.93   75.96    19 1.23
## sigma_squared[3]   12.00    6.03 24.35    0.09    0.27    2.47   16.74   69.54    16 1.31
## Pi[1]               0.38    0.10  0.26    0.07    0.13    0.28    0.66    0.80     7 3.29
## Pi[2]               0.34    0.08  0.23    0.07    0.15    0.26    0.58    0.78     8 2.49
## Pi[3]               0.28    0.08  0.22    0.07    0.12    0.17    0.38    0.77     7 2.73
## sigma[1]            2.63    1.01  2.69    0.33    0.60    1.86    2.66    9.67     7 2.62
## sigma[2]            2.95    0.84  2.52    0.35    0.74    2.00    4.68    8.72     9 1.77
## sigma[3]            2.43    0.87  2.46    0.31    0.52    1.57    4.09    8.34     8 2.08
## lp__             -233.58    0.04  2.32 -238.98 -234.89 -233.21 -231.87 -230.17  2679 1.01
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:26:34 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit3)

pairs_plot_all(normal_fixed_mixture_stan_fit3)

plot_draws(normal_fixed_mixture_stan_fit3)

Interestingly, lp__ has Rhat 1.00. However, the actual density estimates took on two shapes. One is with two separate peaks and the other is two peaks joined together.

4 latent cluster model

normal_fixed_mixture_stan_fit4 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 4*5,
                            H = 4,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 123 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 927 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit4)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                     mean se_mean    sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]              20.45    2.08  5.41    9.62   19.69   22.05   23.49   28.51     7 3.12
## mu[2]              21.15    1.50  3.93    9.58   19.95   22.26   22.93   26.52     7 2.98
## mu[3]              15.89    2.56  6.35    9.36    9.70   10.89   22.45   24.28     6 7.72
## mu[4]              19.37    2.40  6.08    9.45   14.16   20.01   23.62   26.90     6 4.56
## sigma_squared[1]   18.24    8.36 33.75    0.14    0.43    1.75   30.01   88.13    16 1.27
## sigma_squared[2]    8.75    6.30 21.65    0.14    0.58    1.62    4.56   64.82    12 1.46
## sigma_squared[3]    2.82    2.63 12.34    0.00    0.17    0.33    0.83   34.56    22 1.31
## sigma_squared[4]   14.02    7.04 24.43    0.10    0.31    0.78   21.12   73.68    12 1.50
## Pi[1]               0.24    0.04  0.11    0.08    0.15    0.23    0.33    0.48    10 1.63
## Pi[2]               0.34    0.06  0.16    0.09    0.23    0.32    0.40    0.74     8 2.18
## Pi[3]               0.20    0.05  0.13    0.03    0.10    0.14    0.30    0.49     8 2.16
## Pi[4]               0.22    0.04  0.11    0.07    0.13    0.20    0.31    0.45     9 1.74
## sigma[1]            3.07    1.08  2.96    0.38    0.66    1.32    5.48    9.39     8 2.21
## sigma[2]            2.04    0.78  2.15    0.37    0.76    1.27    2.14    8.05     8 2.26
## sigma[3]            0.97    0.40  1.37    0.05    0.41    0.57    0.91    5.88    12 1.92
## sigma[4]            2.59    0.96  2.70    0.32    0.56    0.88    4.60    8.58     8 2.45
## lp__             -237.94    1.43  4.45 -249.49 -239.43 -236.82 -234.91 -232.51    10 1.64
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:27:35 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit4)

pairs_plot_all(normal_fixed_mixture_stan_fit4)

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

plot_draws(normal_fixed_mixture_stan_fit4)

shinystan::launch_shinystan(normal_fixed_mixture_stan_fit4)

5 latent cluster model

normal_fixed_mixture_stan_fit5 <-
    rstan::stan(model_code = normal_fixed_mixture_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 5*5,
                            H = 5,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 3185 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 944 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit5)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                            mean se_mean            sd    2.5%     25%     50%     75%          97.5% n_eff Rhat
## mu[1]              1.658000e+01    2.63  1.245000e+01  -17.95    9.70   20.51   23.38   3.223000e+01    22 1.25
## mu[2]              1.634000e+01    3.05  1.827000e+01  -40.76   19.04   21.96   23.08   3.730000e+01    36 1.17
## mu[3]              2.030000e+01    1.67  9.080000e+00    4.66   19.81   22.23   23.11   3.174000e+01    30 1.27
## mu[4]              1.733000e+01    2.22  5.720000e+00    9.42    9.85   19.76   20.46   2.580000e+01     7 3.50
## mu[5]              1.834000e+01    2.50  1.300000e+01  -23.57   19.33   21.24   23.44   3.311000e+01    27 1.21
## sigma_squared[1]  2.220715e+304     NaN           Inf    0.10    0.33    2.05   36.03  3.327886e+167   NaN  NaN
## sigma_squared[2]  1.219606e+303     NaN           Inf    0.08    0.61    2.73   41.14  1.170732e+235   NaN  NaN
## sigma_squared[3]  1.111182e+304     NaN           Inf    0.10    0.48    1.50    8.56  2.728439e+110   NaN  NaN
## sigma_squared[4]   5.720000e+00    2.24  1.129700e+02    0.07    0.24    0.42    0.95   4.036000e+01  2553 1.01
## sigma_squared[5]  1.184024e+304     NaN           Inf    0.11    0.42    2.08   34.60  1.858845e+189   NaN  NaN
## Pi[1]              1.600000e-01    0.03  1.000000e-01    0.03    0.10    0.14    0.22   3.900000e-01    14 1.45
## Pi[2]              1.900000e-01    0.04  1.200000e-01    0.03    0.08    0.18    0.29   4.300000e-01    11 1.62
## Pi[3]              2.300000e-01    0.03  1.200000e-01    0.04    0.14    0.23    0.32   4.800000e-01    12 1.46
## Pi[4]              2.200000e-01    0.03  1.100000e-01    0.06    0.12    0.21    0.30   4.300000e-01    10 1.61
## Pi[5]              1.900000e-01    0.03  1.000000e-01    0.03    0.11    0.17    0.27   4.100000e-01    14 1.40
## sigma[1]          3.497831e+150     NaN 1.489858e+152    0.31    0.58    1.43    6.00   5.710081e+83   NaN 1.00
## sigma[2]          6.982520e+149     NaN 3.491733e+151    0.29    0.78    1.65    6.41  3.421521e+117   NaN 1.00
## sigma[3]          2.519498e+150     NaN 1.053869e+152    0.32    0.69    1.22    2.93   1.648000e+55   NaN 1.01
## sigma[4]           1.170000e+00    0.28  2.080000e+00    0.27    0.49    0.65    0.97   6.350000e+00    57 1.13
## sigma[5]          2.543309e+150     NaN 1.087877e+152    0.34    0.65    1.44    5.88   4.139767e+94   NaN 1.01
## lp__              -2.538000e+02    1.27  5.130000e+00 -263.93 -257.68 -253.62 -249.37  -2.458100e+02    16 1.47
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:28:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit5)

pairs_plot_all(normal_fixed_mixture_stan_fit5)

## Warning in plot.window(...): Internal(pretty()): very large range.. corrected

## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in par(usr = c(usr[1:2], 0, 1.5)): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range

## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

plot_draws(normal_fixed_mixture_stan_fit5)

Rhat for lp__ increased. The resulting density estimates appear somewhat different.

4 latent cluster model with ordered prior on mu

normal_fixed_mixture_ordered_mu_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture.stan")
biostan::print_stan_code(normal_fixed_mixture_ordered_mu_stan_code, section = NULL)
## data { 
##     // Hyperparameters 
##     real alpha; 
##     real beta; 
##     real m; 
##     real s_squared; 
##     real<lower=0> dirichlet_alpha; 
##  
##     // Define variables in data 
##     // Number of observations (an integer) 
##     int<lower=0> n; 
##     // Outcome (a real vector of length n) 
##     real y[n]; 
##     // Number of latent clusters 
##     int<lower=1> H; 
##  
##     // Grid evaluation 
##     real grid_max; 
##     real grid_min; 
##     int<lower=1> grid_length; 
## } 
##  
## transformed data { 
##     real s; 
##     real grid_step; 
##  
##     s = sqrt(s_squared); 
##     grid_step = (grid_max - grid_min) / (grid_length - 1); 
## } 
##  
## parameters { 
##     // Define parameters to estimate 
##     // Population mean (a real number) 
##     ordered[H] mu; 
##     // Population variance (a positive real number) 
##     real<lower=0> sigma_squared[H]; 
##     // Cluster probability 
##     simplex[H] Pi; 
## } 
##  
## transformed parameters { 
##     // Population standard deviation (a positive real number) 
##     real<lower=0> sigma[H]; 
##     // Standard deviation (derived from variance) 
##     sigma = sqrt(sigma_squared); 
## } 
##  
## model { 
##     // Temporary vector for loop use. Need to come first before priors. 
##     real contributions[H]; 
##  
##     // Prior part of Bayesian inference 
##     // All vectorized 
##     // Mean 
##     mu ~ normal(m, s); 
##     // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior 
##     sigma_squared ~ inv_gamma(alpha, beta); 
##     // cluster probability vector 
##     Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H)); 
##  
##     // Likelihood part of Bayesian inference 
##     // Outcome model N(mu, sigma^2) (use SD rather than Var) 
##     for (i in 1:n) { 
##         // Loop over individuals 
##  
##           for (h in 1:H) { 
##               // Loop over clusters within each individual 
##               // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h])) 
##               contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]); 
##           } 
##  
##           // log(sum(exp(contribution element))) 
##           target += log_sum_exp(contributions); 
##  
##     } 
## } 
##  
## generated quantities { 
##  
##     real log_f[grid_length]; 
##  
##     for (g in 1:grid_length) { 
##         // Definiting here avoids reporting of these intermediates. 
##         real contributions[H]; 
##         real grid_value; 
##  
##         grid_value = grid_min + grid_step * (g - 1); 
##         for (h in 1:H) { 
##             contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]); 
##         } 
##  
##         log_f[g] = log_sum_exp(contributions); 
##     } 
##  
## } 
## 
normal_fixed_mixture_ordered_mu_stan_fit4 <-
    rstan::stan(model_code = normal_fixed_mixture_ordered_mu_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 4*5,
                            H = 4,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 5 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 171 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_ordered_mu_stan_fit4)
## Inference for Stan model: 1a37d3991c5cd122dafa78b99a6fc985.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                     mean se_mean       sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]               9.31    0.49     1.56    1.78    9.51    9.67    9.81   10.14    10 2.05
## mu[2]              18.74    1.11     2.82    9.68   19.46   19.72   19.87   20.19     6 3.94
## mu[3]              21.56    0.46     1.31   19.55   19.97   22.06   22.69   23.27     8 2.16
## mu[4]              23.94    0.41     2.01   21.95   22.75   23.21   24.59   29.65    24 1.22
## sigma_squared[1]  314.75  237.65 19445.28    0.06    0.16    0.24    0.43 1061.11  6695 1.00
## sigma_squared[2]   22.67   15.15    94.70    0.12    0.32    0.48    1.25  148.65    39 1.11
## sigma_squared[3]    9.83    6.31    21.38    0.21    0.57    1.53    4.20   66.13    11 1.57
## sigma_squared[4]   22.04    6.96    39.62    0.69    1.81    5.68   33.09   92.60    32 1.15
## Pi[1]               0.11    0.00     0.03    0.06    0.09    0.11    0.14    0.19  2646 1.01
## Pi[2]               0.27    0.03     0.10    0.09    0.19    0.28    0.35    0.45    11 1.50
## Pi[3]               0.33    0.02     0.10    0.14    0.26    0.32    0.39    0.55    23 1.21
## Pi[4]               0.29    0.04     0.12    0.09    0.19    0.28    0.38    0.54    11 1.56
## sigma[1]            3.11    3.35    17.47    0.25    0.39    0.49    0.65   32.57    27 1.16
## sigma[2]            2.60    1.35     3.99    0.35    0.56    0.69    1.12   12.19     9 2.00
## sigma[3]            2.16    0.80     2.27    0.46    0.76    1.24    2.05    8.13     8 2.36
## sigma[4]            3.76    0.85     2.81    0.83    1.35    2.38    5.75    9.62    11 1.67
## lp__             -235.17    1.13     4.02 -244.19 -237.45 -234.53 -232.19 -229.10    13 1.42
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:30:56 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_ordered_mu_stan_fit4)

pairs_plot_all(normal_fixed_mixture_ordered_mu_stan_fit4)

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

plot_draws(normal_fixed_mixture_ordered_mu_stan_fit4)

4 latent cluster model with ordered prior on Pi

normal_fixed_mixture_ordered_Pi_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_ordered_Pi.stan")
biostan::print_stan_code(normal_fixed_mixture_ordered_Pi_stan_code, section = NULL)
## data { 
##     // Hyperparameters 
##     real alpha; 
##     real beta; 
##     real m; 
##     real s_squared; 
##     real<lower=0> dirichlet_alpha; 
##  
##     // Define variables in data 
##     // Number of observations (an integer) 
##     int<lower=0> n; 
##     // Outcome (a real vector of length n) 
##     real y[n]; 
##     // Number of latent clusters 
##     int<lower=1> H; 
##  
##     // Grid evaluation 
##     real grid_max; 
##     real grid_min; 
##     int<lower=1> grid_length; 
## } 
##  
## transformed data { 
##     real s; 
##     real grid_step; 
##  
##     s = sqrt(s_squared); 
##     grid_step = (grid_max - grid_min) / (grid_length - 1); 
## } 
##  
## parameters { 
##     // Define parameters to estimate 
##     // Population mean (a real number) 
##     vector[H] mu; 
##     // Population variance (a positive real number) 
##     real<lower=0> sigma_squared[H]; 
##     // https://discourse.mc-stan.org/t/ordered-simplex/1835 
##     // Unnormalized cluster relative size 
##     positive_ordered[H] lambda; 
## } 
##  
## transformed parameters { 
##     // Population standard deviation (a positive real number) 
##     real<lower=0> sigma[H] = sqrt(sigma_squared); 
##     // Cluster probability 
##     simplex[H] Pi = lambda / sum(lambda); 
##  
##     // Standard deviation (derived from variance) 
##     // sigma = sqrt(sigma_squared); 
##     // Cluster probability 
##     // Pi = lambda / sum(lambda); 
## } 
##  
## model { 
##     // Temporary vector for loop use. Need to come first before priors. 
##     real contributions[H]; 
##  
##     // Prior part of Bayesian inference 
##     // All vectorized 
##     // Mean 
##     mu ~ normal(m, s); 
##     // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior 
##     sigma_squared ~ inv_gamma(alpha, beta); 
##     // cluster probability vector 
##     /* Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H)); */ 
##     target += gamma_lpdf(lambda | rep_vector(dirichlet_alpha / H, H), 1); 
##  
##     // Likelihood part of Bayesian inference 
##     // Outcome model N(mu, sigma^2) (use SD rather than Var) 
##     for (i in 1:n) { 
##         // Loop over individuals 
##  
##           for (h in 1:H) { 
##               // Loop over clusters within each individual 
##               // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h])) 
##               contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]); 
##           } 
##  
##           // log(sum(exp(contribution element))) 
##           target += log_sum_exp(contributions); 
##  
##     } 
## } 
##  
## generated quantities { 
##  
##     real log_f[grid_length]; 
##  
##     for (g in 1:grid_length) { 
##         // Definiting here avoids reporting of these intermediates. 
##         real contributions[H]; 
##         real grid_value; 
##  
##         grid_value = grid_min + grid_step * (g - 1); 
##         for (h in 1:H) { 
##             contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]); 
##         } 
##  
##         log_f[g] = log_sum_exp(contributions); 
##     } 
##  
## } 
## 
normal_fixed_mixture_ordered_Pi_stan_fit4 <-
    rstan::stan(model_code = normal_fixed_mixture_ordered_Pi_stan_code,
                data = list(alpha = 10^(-3), beta = 10^(-3),
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 4*5,
                            H = 4,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 596 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_ordered_Pi_stan_fit4)
## Inference for Stan model: 8442f443c45aac0def9566d268278405.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##                            mean se_mean            sd    2.5%     25%     50%     75%          97.5% n_eff Rhat
## mu[1]              1.629000e+01    3.02  1.230000e+01  -11.98    9.70   19.44   23.56   3.352000e+01    17 1.32
## mu[2]              1.991000e+01    1.95  5.120000e+00    9.52   19.62   20.00   22.95   2.792000e+01     7 3.12
## mu[3]              1.765000e+01    2.34  5.840000e+00    9.41    9.84   19.81   22.59   2.541000e+01     6 5.93
## mu[4]              2.189000e+01    0.37  1.380000e+00   19.03   21.35   21.97   22.64   2.462000e+01    14 1.43
## sigma_squared[1]  5.589569e+303     NaN           Inf    0.07    0.19    0.41   26.90  1.159897e+169   NaN  NaN
## sigma_squared[2]   9.900000e+00    6.37  2.373000e+01    0.08    0.25    0.54    2.39   6.947000e+01    14 1.40
## sigma_squared[3]   6.000000e+00    4.81  1.383000e+01    0.10    0.26    0.49    1.85   4.562000e+01     8 1.95
## sigma_squared[4]   1.324000e+01    6.86  1.855000e+01    0.42    2.51    4.34   19.33   6.746000e+01     7 2.41
## Pi[1]              1.200000e-01    0.01  4.000000e-02    0.04    0.09    0.11    0.14   1.900000e-01    25 1.17
## Pi[2]              1.900000e-01    0.02  5.000000e-02    0.10    0.15    0.19    0.24   2.900000e-01    11 1.47
## Pi[3]              2.500000e-01    0.02  6.000000e-02    0.14    0.20    0.27    0.30   3.600000e-01     9 1.71
## Pi[4]              4.400000e-01    0.03  1.000000e-01    0.30    0.36    0.41    0.51   6.600000e-01    13 1.39
## sigma[1]          1.115587e+150     NaN 7.475821e+151    0.26    0.44    0.64    5.19   3.405717e+84   NaN 1.00
## sigma[2]           1.930000e+00    0.82  2.480000e+00    0.29    0.50    0.73    1.55   8.330000e+00     9 1.96
## sigma[3]           1.550000e+00    0.73  1.900000e+00    0.32    0.51    0.70    1.36   6.750000e+00     7 3.10
## sigma[4]           2.970000e+00    0.80  2.090000e+00    0.65    1.59    2.08    4.40   8.210000e+00     7 2.91
## lp__              -2.189600e+02    1.93  5.560000e+00 -234.57 -220.95 -217.93 -215.33  -2.114200e+02     8 1.92
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:32:22 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_ordered_Pi_stan_fit4)

pairs_plot_all(normal_fixed_mixture_ordered_Pi_stan_fit4)

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'

plot_draws(normal_fixed_mixture_ordered_Pi_stan_fit4)

4 latent cluster model with half-Cauchy prior on sigma

normal_fixed_mixture_half_cauchy_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_half_cauchy.stan")
biostan::print_stan_code(normal_fixed_mixture_half_cauchy_stan_code, section = NULL)
## data { 
##     // Hyperparameters 
##     // For sigma 
##     // https://en.wikipedia.org/wiki/Cauchy_distribution 
##     real location; 
##     real scale; 
##     // For mu 
##     real m; 
##     real s_squared; 
##     real<lower=0> dirichlet_alpha; 
##  
##     // Define variables in data 
##     // Number of observations (an integer) 
##     int<lower=0> n; 
##     // Outcome (a real vector of length n) 
##     real y[n]; 
##     // Number of latent clusters 
##     int<lower=1> H; 
##  
##     // Grid evaluation 
##     real grid_max; 
##     real grid_min; 
##     int<lower=1> grid_length; 
## } 
##  
## transformed data { 
##     real s; 
##     real grid_step; 
##  
##     s = sqrt(s_squared); 
##     grid_step = (grid_max - grid_min) / (grid_length - 1); 
## } 
##  
## parameters { 
##     // Define parameters to estimate 
##     // Population mean (a real number) 
##     ordered[H] mu; 
##     // Population variance (a positive real number) 
##     real<lower=0> sigma[H]; 
##     // Cluster probability 
##     simplex[H] Pi; 
## } 
##  
## transformed parameters { 
## } 
##  
## model { 
##     // Temporary vector for loop use. Need to come first before priors. 
##     real contributions[H]; 
##  
##     // Prior part of Bayesian inference 
##     // All vectorized 
##     // Mean 
##     mu ~ normal(m, s); 
##     // sigma has half-cauchy (alpha = 1, beta = 1) prior 
##     sigma ~ cauchy(location, scale); 
##     // cluster probability vector 
##     Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H)); 
##  
##     // Likelihood part of Bayesian inference 
##     // Outcome model N(mu, sigma^2) (use SD rather than Var) 
##     for (i in 1:n) { 
##         // Loop over individuals 
##  
##           for (h in 1:H) { 
##               // Loop over clusters within each individual 
##               // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h])) 
##               contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]); 
##           } 
##  
##           // log(sum(exp(contribution element))) 
##           target += log_sum_exp(contributions); 
##  
##     } 
## } 
##  
## generated quantities { 
##  
##     real log_f[grid_length]; 
##  
##     for (g in 1:grid_length) { 
##         // Definiting here avoids reporting of these intermediates. 
##         real contributions[H]; 
##         real grid_value; 
##  
##         grid_value = grid_min + grid_step * (g - 1); 
##         for (h in 1:H) { 
##             contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]); 
##         } 
##  
##         log_f[g] = log_sum_exp(contributions); 
##     } 
##  
## } 
## 
normal_fixed_mixture_half_cauchy_stan_fit4 <-
    rstan::stan(model_code = normal_fixed_mixture_half_cauchy_stan_code,
                data = list(location = 0, scale = 2,
                            m = 0, s_squared = 10^(3),
                            n = nrow(galaxy),
                            y = galaxy$k_speed,
                            dirichlet_alpha = 4*5,
                            H = 4,
                            grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
                chains = 12)
## Warning: There were 5 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(normal_fixed_mixture_half_cauchy_stan_fit4)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
print_relevant_pars(normal_fixed_mixture_half_cauchy_stan_fit4,
                    pars = c("mu","Pi","sigma","lp__"))
## Inference for Stan model: 487c8d056b9f381c7c5598bef442b3d5.
## 12 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=12000.
## 
##             mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu[1]       9.73    0.01 0.24    9.26    9.58    9.72    9.87   10.22  1125 1.02
## mu[2]      19.73    0.13 0.57   18.36   19.66   19.79   19.93   20.34    20 1.26
## mu[3]      22.22    0.31 0.94   19.74   21.90   22.52   22.86   23.34     9 1.70
## mu[4]      24.66    0.55 2.26   22.19   23.02   23.93   25.70   30.45    17 1.29
## Pi[1]       0.11    0.00 0.03    0.06    0.09    0.11    0.13    0.18  2927 1.01
## Pi[2]       0.32    0.02 0.09    0.14    0.26    0.33    0.38    0.46    18 1.23
## Pi[3]       0.33    0.02 0.10    0.14    0.26    0.32    0.39    0.55    24 1.23
## Pi[4]       0.24    0.03 0.10    0.09    0.16    0.22    0.30    0.47    13 1.44
## sigma[1]    0.58    0.00 0.24    0.30    0.43    0.54    0.67    1.18  2418 1.01
## sigma[2]    1.37    0.89 2.29    0.40    0.60    0.70    0.83    9.36     7 3.22
## sigma[3]    2.42    0.71 2.17    0.60    1.12    1.49    2.33    8.02     9 2.18
## sigma[4]    4.85    1.09 3.09    0.86    1.64    4.96    6.37   11.98     8 2.25
## lp__     -235.81    0.53 3.17 -243.19 -237.55 -235.35 -233.58 -230.82    36 1.12
## 
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:33:37 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_half_cauchy_stan_fit4,
              pars = c("mu","Pi","sigma","lp__"))

pairs_plot_all(normal_fixed_mixture_half_cauchy_stan_fit4,
               pars = c("mu","Pi","sigma","lp__"))

## Error in pairs.default(x = structure(c(-236.856315027416, -233.670884234658, : only one column in the argument to 'pairs'

plot_draws(normal_fixed_mixture_half_cauchy_stan_fit4)


print(sessionInfo())
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.4
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DPpackage_1.1-7.4  rstan_2.18.2       StanHeaders_2.18.1 forcats_0.3.0      stringr_1.4.0     
##  [6] dplyr_0.8.0        purrr_0.3.0        readr_1.3.1        tidyr_0.8.2        tibble_2.0.1      
## [11] ggplot2_3.1.0      tidyverse_1.2.1    doRNG_1.7.1        rngtools_1.3.1     pkgmaker_0.27     
## [16] registry_0.5       doParallel_1.0.14  iterators_1.0.10   foreach_1.4.4      knitr_1.21        
## 
## loaded via a namespace (and not attached):
##   [1] minqa_1.2.4               colorspace_1.4-0          ggridges_0.5.1            rsconnect_0.8.13         
##   [5] ggstance_0.3.1            markdown_0.9              base64enc_0.1-3           rstudioapi_0.9.0         
##   [9] svUnit_0.7-12             DT_0.5                    fansi_0.4.0               lubridate_1.7.4          
##  [13] xml2_1.2.0                codetools_0.2-16          splines_3.5.2             shinythemes_1.1.2        
##  [17] bayesplot_1.6.0           jsonlite_1.6              nloptr_1.2.1              broom_0.5.1              
##  [21] shiny_1.2.0               compiler_3.5.2            httr_1.4.0                backports_1.1.3          
##  [25] assertthat_0.2.0          Matrix_1.2-15             lazyeval_0.2.1            cli_1.0.1                
##  [29] later_0.8.0               htmltools_0.3.6           prettyunits_1.0.2         tools_3.5.2              
##  [33] igraph_1.2.4              coda_0.19-2               gtable_0.2.0              glue_1.3.0               
##  [37] reshape2_1.4.3            Rcpp_1.0.0                cellranger_1.1.0          nlme_3.1-137             
##  [41] crosstalk_1.0.0           xfun_0.4                  ps_1.3.0                  lme4_1.1-20              
##  [45] rvest_0.3.2               mime_0.6                  miniUI_0.1.1.1            tidybayes_1.0.3          
##  [49] gtools_3.8.1              MASS_7.3-51.1             zoo_1.8-4                 scales_1.0.0             
##  [53] rstanarm_2.18.2           colourpicker_1.0          hms_0.4.2                 promises_1.0.1           
##  [57] inline_0.3.15             shinystan_2.5.0           yaml_2.2.0                gridExtra_2.3            
##  [61] loo_2.0.0                 stringi_1.3.1             dygraphs_1.1.1.6          pkgbuild_1.0.2           
##  [65] bibtex_0.4.2              rlang_0.3.1               pkgconfig_2.0.2           matrixStats_0.54.0       
##  [69] evaluate_0.13             lattice_0.20-38           rstantools_1.5.1          htmlwidgets_1.3          
##  [73] labeling_0.3              processx_3.2.1            tidyselect_0.2.5          biostan_0.1.0            
##  [77] plyr_1.8.4                magrittr_1.5              R6_2.4.0                  generics_0.0.2           
##  [81] pillar_1.3.1              haven_2.0.0               withr_2.1.2               xts_0.11-2               
##  [85] survival_2.43-3           modelr_0.1.3              crayon_1.3.4              arrayhelpers_1.0-20160527
##  [89] KernSmooth_2.23-15        utf8_1.1.4                rmarkdown_1.11            grid_3.5.2               
##  [93] readxl_1.2.0              callr_3.1.1               threejs_0.3.1             digest_0.6.18            
##  [97] xtable_1.8-3              httpuv_1.4.5.1            stats4_3.5.2              munsell_0.5.0            
## [101] shinyjs_1.0
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started  ", as.character(start_time), "\n",
    "Finished ", as.character(end_time), "\n",
    "Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
    "Used ", foreach::getDoParWorkers(), " cores\n",
    "Used ", foreach::getDoParName(), " as backend\n",
    sep = "")
## Started  2019-04-27 06:22:03
## Finished 2019-04-27 06:34:00
## Time difference of 11.9426 mins
## Used 12 cores
## Used doParallelMC as backend